package com.vr.cloudpicture.api.imagesearch.sub;

import com.vr.cloudpicture.exception.BusinessException;
import com.vr.cloudpicture.exception.ErrorCode;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 获取图像firstUrl API
 *
 * @author hzh
 * @date 2025/05/20
 */
@Slf4j
public class GetImageFirstUrlApi {
    /**
     * 获取图像列表页面地址
     *
     * @param url 网址
     * @return {@code String }
     */
    public static String getImageFirstUrl(String url) {
        try {
            // 1.通过jsoup获取html内容
            Document document = Jsoup.connect(url)
                    .timeout(5000)
                    .get();
            // 2.获取所有script标签
            Elements scriptElements = document.getElementsByTag("script");
            // 便利遍历script标签找到包含firstUrl的内容
            for (Element scriptElement : scriptElements) {
                String scriptContent = scriptElement.html();
                if (scriptContent.contains("\"firstUrl\"")) {
                    // 3.用正则获取firstUrl的值
                    Pattern pattern = Pattern.compile("\"firstUrl\"\\s*:\\s*\"(.*?)\"");
                    Matcher matcher = pattern.matcher(scriptContent);
                    if (matcher.find()) {
                        String firstUrl = matcher.group(1);
                        // 处理转义字符
                        firstUrl = firstUrl.replace("\\/", "/");
                        return firstUrl;
                    }
                }
            }
            throw new BusinessException(ErrorCode.OPERATION_ERROR,"未找到url");
        }catch (Exception e) {
            log.error("搜索失败",e);
            throw new BusinessException(ErrorCode.OPERATION_ERROR, "搜索失败");
        }
    }
    public static void main(String[] args) {
        System.out.println(getImageFirstUrl("https://graph.baidu.com/s?card_key=&entrance=GENERAL&extUiData[isLogoShow]=1&f=all&isLogoShow=1&session_id=1996457687670974128&sign=12680b0520a753a8672ef01747747419&tpl_from=pc"));
    }
}
